Regular Season Team Shooting Efficiency & Frequency

Author

Louis BT

Published

December 15, 2024

Code: Setup
library(tidyverse)
library(kableExtra)
library(gridExtra)
library(patchwork)
source('half_court_plot.R')

data = read.csv('2024.csv')
pbpdata = read.csv('2024pbpclean.csv')
pbpdata[is.na(pbpdata)] = 0 
pbpdata$shot_off = NA
Code: Data Manipulation
df <- data %>%
  select(team, date, fgm_L_cor3, fga_L_cor3, fgm_R_cor3, fga_R_cor3, fgm_cor3, fgm_ab3, fga_ab3) %>% 
  group_by(team, date) %>% 
  mutate(across(
    .cols = everything(), 
    .fns = ~ as.numeric(gsub('-', '0', .))
  )) %>% summarise(across(where(is.numeric), sum)) %>%
  mutate(across(.cols = everything(), .fns = ~ replace_na(.x, 0)))
#view(df)

pbpdf = pbpdata %>% 
  filter(event_type %in% c('shot_made', 'shot_missed')) %>% 
  select(date, team, shot_distance, `X2fgm`, `X2fga`, `X3fgm`, `X3fga`) %>% 
mutate(
    `fgm_.5` = ifelse((shot_distance < 5) & (`X2fgm` == 1), 1, 0),
    `fga_.5` = ifelse((shot_distance < 5) & (`X2fga` == 1), 1, 0),
    `fgm_5.10` = ifelse((shot_distance >= 5 & shot_distance < 10) & (X2fgm == 1), 1, 0), 
    `fga_5.10` = ifelse((shot_distance >= 5 & shot_distance < 10) & (X2fga == 1), 1, 0), 
    `fgm_10.15` = ifelse((shot_distance >= 10 & shot_distance < 15) & (X2fgm == 1), 1, 0),
    `fga_10.15` = ifelse((shot_distance >= 10 & shot_distance < 15) & (X2fga == 1), 1, 0), 
    `fgm_15.20` = ifelse((shot_distance >= 15 & shot_distance < 20) & (X2fgm == 1), 1, 0),
    `fga_15.20` = ifelse((shot_distance >= 15 & shot_distance < 20) & (X2fga == 1), 1, 0),
    `fgm_20.3` = ifelse((shot_distance >= 20) & (X2fgm == 1), 1, 0),
    `fga_20.3` = ifelse((shot_distance >= 20) & (X2fga == 1), 1, 0), 
    fgm = ifelse((`X2fgm` == 1) | (`X3fgm` == 1) , 1, 0), 
    fga = ifelse((`X2fga` == 1) | (`X3fga` == 1), 1, 0), 
    is3m = ifelse((`X3fgm` == 1), 1, 0), 
    is3a = ifelse((`X3fga` == 1), 1, 0), 
    
    heave_m = ifelse((shot_distance >= 40) & (X3fgm == 1), 1, 0),
    heave_a = ifelse((shot_distance >= 40) & (X3fga == 1), 1, 0)
  ) %>% 
  select(!shot_distance) %>% group_by(date, team) %>% 
  summarise(across(.cols = everything(), sum))
  
comb_df = df %>% 
  left_join(pbpdf, 
            by =c("team" = 'team', 'date' = 'date'))  %>% group_by(date, team) %>% 
  filter(abs(X3fga - fga_L_cor3 - fga_R_cor3 - fga_ab3 - heave_a) <= 1) %>% ungroup() %>% group_by(team) %>%
  select(!date) %>% 
  summarise(across(everything(), sum)) %>% 
  mutate(

  efg = (fgm + 0.5 * X3fgm) / fga,

  # Field Goal Percentage by Distance
  `<5ft%` = `fgm_.5` / `fga_.5`,
  `[5,10)%` = `fgm_5.10` / `fga_5.10`,
  `[10,15)%` = `fgm_10.15` / `fga_10.15`,
  `[15,20)%` = `fgm_15.20` / `fga_15.20`,
  `[20,3)%` = `fgm_20.3` / `fga_20.3`,
  `Lcor3%` = 1.5*fgm_L_cor3 / fga_L_cor3,
  `Rcor3%` = 1.5*fgm_R_cor3 / fga_R_cor3,
  `ab3%` = 1.5*`fgm_ab3` / `fga_ab3`,
  `heave%` = heave_m / heave_a,

  # Weight
  `<5ft_weight` = `fga_.5` / fga,
  `[5,10)_weight` = `fga_5.10` / fga,
  `[10,15)_weight` = `fga_10.15` / fga,
  `[15,20)_weight` = `fga_15.20` / fga,
  `[20,3)_weight` = `fgm_20.3` / fga,
  `L_cor3_weight` = `fga_L_cor3` / fga,
  `R_cor3_weight` =  `fga_R_cor3`/fga,
  `ab3_weight` =  `fga_ab3` / fga,
  `heave_weight` = heave_a / fga
  ) %>% 
  select(!c(fgm_cor3, fgm_L_cor3, fga_L_cor3, fgm_R_cor3, fga_R_cor3, fgm_ab3, fga_ab3, fgm_.5, fga_.5, fgm_5.10, fga_5.10, fgm_10.15, fga_10.15, fgm_15.20, fga_15.20, fgm_20.3, fga_20.3, fgm, fga, heave_m, heave_a, X2fgm, X2fga, X3fgm, X3fga, is3a, is3m)) %>% 
  mutate(across(.cols = !team, .fns = ~.*100))
Code: Graph Creation
gradient <- colorRampPalette(c( "red", "blue"))
palette <- gradient(100)
values <- seq(35, 70, length.out = 100)
teams_efg = comb_df %>% select(`<5ft%`:`ab3%`)

gradient2 = colorRampPalette(c('green', 'purple'))
palette2 = gradient2(100)
values2 = seq(0, 40, length.out = 100)
teams_weight = comb_df %>% select(`<5ft_weight`:`ab3_weight`)

colours = data.frame(x = values, y = rep(1, length(values)), color = palette)
blue_red_grad = ggplot(colours, aes(x = x, y = y)) +
  geom_point(aes(color = x), size = 5, show.legend = FALSE) + 
  scale_color_gradientn(colors = c("red", "blue")) +
  theme_void() + 
  theme(
    axis.text.y = element_text(size = 10), 
    axis.ticks.y = element_line(), 
    )  +
   coord_flip()

colours = data.frame(x = values2, y = rep(1, length(values)), color = palette2)
purple_green_grad = ggplot(colours, aes(x = x, y = y)) +
  geom_point(aes(color = x), size = 5, show.legend = FALSE) + 
  scale_color_gradientn(colors = c("green", "purple")) +
  theme_void() + 
  theme(
    axis.text.y = element_text(size = 10), 
    axis.ticks.y = element_line(), 
    )  +
   coord_flip()

for (i in 1:nrow(comb_df)){
  efg_numbers = as.vector(teams_efg[i, ]) %>% unlist()
  scaled_numbers <- scales::rescale(efg_numbers, to = c(1, length(palette)))
  mapped_colours <- palette[round(scaled_numbers)]
  mapped_colours <- c("", "", mapped_colours)
  p = create_plot(colours = mapped_colours, title = paste0(comb_df$team[i], " EFG% (23-24)"))
  
  efg_weights = as.vector(teams_weight[i, ]) %>% unlist()
  scaled_numbers = scales::rescale(efg_weights, to = c(1, length(palette2)))
  mapped_colours = palette2[round(scaled_numbers)]
  mapped_colours = c("", "", mapped_colours)
  p2 = create_plot(colours = mapped_colours, title = paste0(comb_df$team[i], " Shot Frequency Percent (23-24)"))
  #grid.arrange(p, p2, ncol=2)
  comb_plots = p + blue_red_grad + p2 + purple_green_grad + 
    plot_layout(widths = c(4, 1, 4, 1))
  print(comb_plots)
  
}